* NOTE: if Stata does not do so automatically, cd to the top level directory of the data depository (where this script is stored)
* requires user-written packages colorpalette, grc1leg2, grstyle, sadi, sq (all from SSC)
clear all



/****************************************************************************
******************************************************************************
	I. DOCUMENT VIEWS
******************************************************************************
****************************************************************************/

/***********************************************************
	Fig. 1
************************************************************/
use data\sequence.dta, clear
sqset doc ID eventtime
grstyle init
grstyle set plain, nogrid
grstyle set color cblind
forvalues c==1/7 {
	preserve
	keep if country==`c'
	egen id = group(ID)
	sqset doc id eventtime
	sqindexplot, scheme(_GRSTYLE_) plotregion(margin(zero)) title("`: label country `c''") xtitle("Normalized Time") xlabel(17 "start" 482 "finish", labsize(*.5) notick) ytitle("Judge") ylabel(minmax) name(p`c', replace) nodraw ///
	legend(cols(1) order(2 "facts" 1 "briefs" 6 "trial judgment" 5 "statute" 4 "precedent") size(*.75)) 
	restore
}
grc1leg2 p1 p2 p3 p4 p5 p6 p7, scheme(s1color) xcommon rows(3) holes(4 7) imargin(tiny) graphregion(margin(r+8)) xtob1title ytol1title ring(0) pos(7) title("Fig. 1: Document View Paths") ///
	note("For each judge aligned on vertical axis, the graph plots the documents live on the screen from start to finish. Orange indicates instruction views.", size(*.73))
graph play fig1_com_civ_sides
graph save 	graphs\sequences, replace
	

/***********************************************************
	Cross-Country Comparisons Using Edit Distance
***********************************************************/

use data\sequence.dta, clear
reshape wide doc, i(country ID) j(eventtime)
sort ID
gen commonL = inlist(country,"India":country,"USA":country) + 1 // email from B Halpin 2019/12/2: sddiscrep assumes group variable starts at 1, otherwise will throw an error
foreach subscost in 1 1.5 { // 1 is Levenshtein
di _n "*** Substitution cost `subscost' ***"
matrix scost = `subscost'*(J(6,6,1) - I(6))
forvalues prime=-1/1 {
	preserve
	if `prime'>-1 {
		decode ID, gen(randomID)
		merge 1:1 random using data\maindata.dta, assert(2 3) keep(3) nogenerate keepusing(prime)
		keep if prime==`prime'
		drop prime random
		}
	sort ID
	oma doc*, subsmat(scost) indel(1) length(500) pwdist(ED) dups
	sddiscrep country, dist(ED) id(ID) niter(100000)
	sddiscrep commonL, dist(ED) id(ID) niter(1000) // problem: the test here should be for groups of countries not for groups of individuals (akin to clustering) --> permutation test in next lines
	scalar Rstar = r(pseudoR2)
	local p = 0
	cap matrix drop R2
	forvalues i=1/5 { // not running it for i=6 (-->j=7) because that is the actual test statistic
	forvalues j=`=`i'+1'/7 { 
		qui replace commonL = inlist(country,`i',`j') + 1
		qui sddiscrep commonL, dist(ED) id(ID) niter(1)
		matrix R2 = (nullmat(R2) \ r(pseudoR2))
	}
	}
	clear
	svmat R2
	count if R21>=Rstar
	di "country-permutation p-value of common v. civil law for prime=`prime': " =(r(N)+1)/(_N+1) // cf. Lehmann & Romano eq 15.5 and note N, _N do not include the actual com/civ pair
	restore
}
}

/***********************************************************
	Summary statistics & correlations with reasons
************************************************************/
use data\sequence.dta, clear
contract country ID doc, freq(t_)
replace t_=t_/500
decode doc, gen(document)
drop doc
reshape wide t_, i(country ID) j(document) string
egen total = rowtotal(t_*)
assert round(total,.000001)==1
recode t_* (. = 0)
table country, contents(mean t_precedent mean t_statute) format(%5.3f)

count if t_prec==0
di %4.2f (_N-r(N))/_N // fraction of judges consulting precedent
count if t_statute==0
di %4.2f (_N-r(N))/_N // fraction of judges consulting statute
decode ID, gen(randomID)
merge 1:1 randomID using data\maindata.dta, assert(3) keepusing(statute prec_mentioned_gen) nogenerate
gen Statute = statute/4 // resolving coder disagreement by averaging
gen Precedent = prec_mentioned_gen/4
sum Precedent Statute // fraction of judges mentioned precedent and statute
pwcorr Statute Precedent t_statute t_prec, sig
spearman Statute Precedent t_statute t_prec, stats(rho p)
	

/****************************************************************************
******************************************************************************
	II. JUDGMENT REASONS
******************************************************************************
****************************************************************************/

use statute policy prec_mentioned_gen judgmentreasons country prime using data\maindata.dta, clear
local elements "Statute Policy Precedent" 
rename (statute policy prec_mentioned_gen) (`elements') // alternative to prec_mentioned_gen: prec_mentioned_spec prec_disti -- graph qualitatively similar
gen words = wordcount(judgmentreasons)
bys country: egen cwords = mean(words)
foreach element in `elements' {
	replace `element' = `element'/4 // this deals with coder disagreement: thus far we had scored each variable by 0-4 coders checking a feature
	gen `element'_sc = `element'/cwords // this scales down a mention by the average length of reasons for that country
	local elements_sc "`elements_sc' `element'_sc"
	}
gen commonL = inlist(country,"India":country,"USA":country)

* importance of scaling
sum words
tabstat words, by(country) format(%6.2f)

/***********************************************************
	Fig. 1
************************************************************/
label list country // to check if next line is correct!
local legend legend(off) // alternatively: "legend(order(- "Civil Law:" 1 "Argentina" 2 "Brazil" 3 "China" 4 "France" 5 "Germany" - "Common Law:" 6 "India" 7 "USA") cols(6) size(*.5))" -- in that case use grc1leg instead of graph combine below
local countrylabels "xlabel(1 "AR" 2 "BR" 3 "CN" 4 "FR" 5 "DE" 6 "IN" 7 "US", labsize(*2))" // consider turning off if turning previous line on
local cibaropts "over(country) barcol(ltblue green red navy gold orange blue) vce(bootstrap) ciopts(lwidth(*4))"
local gphopts "scheme(s1color) ytitle("") xtitle("") nodraw `legend'"
cibar Statute,		`cibaropts' graphopts(`gphopts' ylabel(, labsize(*1.5))	plotregion(margin(b=0))	name(Statute	 , replace) title("Statute",   size(*2.5))) // not sure the plotregion here and imargin below have any effect
cibar Policy,		`cibaropts' graphopts(`gphopts' ylabel(,notick nolabel)	plotregion(margin(b=0))	name(Policy		 , replace) title("Policy",	   size(*2.5)))
cibar Precedent,	`cibaropts' graphopts(`gphopts' ylabel(,notick nolabel)	plotregion(margin(b=0))	name(Precedent   , replace) title("Precedent", size(*2.5)))
cibar Statute_sc,	`cibaropts' graphopts(`gphopts' ylabel(, labsize(*1.5))	`countrylabels'			name(Statute_sc  , replace) title(""))
cibar Policy_sc,	`cibaropts' graphopts(`gphopts' ylabel(,notick nolabel)	`countrylabels'			name(Policy_sc   , replace) title(""))
cibar Precedent_sc,	`cibaropts' graphopts(`gphopts' ylabel(,notick nolabel)	`countrylabels'			name(Precedent_sc, replace) title(""))
graph combine Statute Policy Precedent,			 scheme(s1color) ycommon xcommon altshrink rows(1) imargin(b=0)										 			  name(raw, replace)
graph combine Statute_sc Policy_sc Precedent_sc, scheme(s1color) ycommon xcommon altshrink rows(1) title("{it:---Scaled by Average Length (Words)---}", size(*2)) name(sc, replace)
graph combine raw sc, scheme(s1color) xcommon altshrink rows(2) title("Fig. 2: Prevalence of Reasons by Country") ///
	note("Country means and 95% bootstrap confidence intervals." ///
	"In the upper panel, the mean is taken over indicators whether an individual judge mentioned the feature in the" ///
	"reasons (scored as the number of 0-4 coders who so coded this judge, divided by four). In the lower panel, the" ///
	"mean is taken over the indicators divided by the average number of words in that country's translated reasons.")
graph play fig2_com_civ_sides
graph save graphs\reasons_prevalence_revised2.gph, replace

/***********************************************************
	Comparison: Country and Com/Civ differences
************************************************************/

* size of country vs. com/civ differences
foreach element in `elements' `elements_sc' {
	qui ttest `element', by(commonL)
	scalar comcivdiff = abs(r(mu_2)-r(mu_1))
	qui ttest `element' if commonL, by(country)
	scalar USIndiadiff = abs(r(mu_2)-r(mu_1))
	di USIndiadiff-comcivdiff
}

* tests of country differences
manova `elements' = country // to get the value of Pillai's trace etc.
permute country (e(stat_m)[2,1]), reps(10000) dots(100): manova `elements'	  = country // country test: permute Pillai's trace
manova `elements_sc' = country
permute country (e(stat_m)[2,1]), reps(10000) dots(100): manova `elements_sc' = country

* tests of common/civil differences
program define modHotelT2, rclass // modified Hotelling's T a la Chung & Romano J Econometrics 2016
		forvalues j=0/1 { // j=0/1 will call civil and common law, respectively
			qui mat accum C`j' = `0' if commonL==`j', noconstant deviations means(M`j') // matrices of (N-1)*cov and means, cf. stata.com/support/faqs/data-management/saving-stats/
			mat C`j' = C`j' / ((r(N)-1)*r(N)) // I already apply here the weight 1/n_group for the summation of the two group-specific covariance matrices
		}
		mat T2 = (M1-M0)*invsym(C0+C1)*(M1-M0)' // Chung & Romano equations 10-13 (rearranged by pulling the scale factor m^(-1/2) into the sums)
		return scalar T2 = T2[1,1]
		end
forvalues prime=-1/1 { // -1 will be all observations; 0 and 1 runs the sample separately for those that are primed and those that are not
	preserve // needed to undo next line and permutation of commonL grouping
	if `prime'>-1 keep if prime==`prime'
	local p = 1 // the actual test statistic itself is equal to itself -- and the code below saves time by not calculating that permutation
	local B = 1 // cf. prior line -- also adding to denominator
	modHotelT2 `elements'
	scalar t_star = r(T2)
	forvalues i=1/5 { // not running it for i=6 (-->j=7) because that is the actual test statistic
	forvalues j=`=`i'+1'/7 { 
		qui replace commonL = inlist(country,`i',`j')
		modHotelT2 `elements'
		if r(T2) >= t_star local p = `p' + 1
		local B = `B' + 1
		}
		}
	di "prime " `prime' ", T2 = " %4.2f t_star ", p = " %4.2f `p'/`B'
	restore
	}